##################################
# District size -- grid-cell RDD
#
#
# Replication Material for:
# Continuity or Change? (In)direct Rule in British and French Colonial Africa
# 
# Carl Mueller-Crepon, 2020
# International Organization
#
# File Description:
# District size RDD analysis
#
# Called from indrule_analysis_all.R 
#
##################################


# Libraries
library(lfe)
library(ggplot2)
library(stargazer)
library(MASS)
library(plyr)


# Paths
fig.path = "figures/governance/"
tab.path = "tables/governance/"

# Output type
output.type <- "latex"


##### LaTeX Functionalities
source("scripts/functions/analysis_functions.R")


# Balance table
source("scripts/functions/balance_table.R")



# Load Data

# ... all points (main spec)
load("data/pts_rdd_govscale.Rdata")

# ... govscale data for subsetting
load("data/data_governance.RData")



# Border-segments
points$dist.bucket <- cut(points$dist.coast, seq(0,10000, by = 110))

# Prepare

## Cowcodes of varying subsets
west.africa <- c(402:475,483)
coast.west.africa <- c(404,420,433,435,438,452,481,434,437,452,461,471,475)

## Data (for balance tests and main estimation)
balance.data <- points[(points$colfrnc == 1 | points$colbrit == 1) & !is.na(points$colbrit) &
                         points$cowcode %in% data.dist$countries_cowid,]

balance.data[balance.data$fbperp.id %in% c("452.461", "471.475") & !is.na(balance.data$fbperp.id),
             c("fbperp.id", "fbperp.dist")] <- NA


## Variables for balance tests
balance.vars <- c( "log(dist.coast)" ,"log(river.dist)" ,"log(1+pop.1880)" ,  "log((group.pop.1880+1)/group.area)"  ,
                   paste0("v",c(4,5,28),".num"),
                   "medianaltitude","medianslope","temperaturemean","evapotranspiration",  
                   "precipitation", "ppetratio", "suit", "max.cash.crop" )
balance.var.labs <- c("Distance to coast (log)", "Distance to nav. river (log)", "Population density (log)",  "Ethnic groups' pop. dens. (log)", 
                      "Dependence on agriculture", "Dependence on husbandry", "Intensity of agriculture", 
                      "Altitude", "Slope", "Temperature", "Evapotranspiration",
                      "Precipitation", "Evapotransp. / precipitation", "Suitability for agr.", "Cash crop suitability")
balance.notes <- "$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. Standard errors are clustered on the province-level."

############################
# Specification details ####

# Treatment
treat <- c("v33.num + v33.num_colfrnc")
balance.data$v33.num_colfrnc <- balance.data$v33.num * balance.data$colfrnc

# Cluster
se.cluster <- "province.id"

# Fe Main
fe.main <- " cowcode "

# Notes
latex.notes.add <- function(width = .95){
  paste0("\\parbox[t]{",width,"\\textwidth}{\\textit{Notes:} OLS models. Standard errors are clustered on the province-level.
         Baseline controls include the local population density, ethnic groups'  population density, 
         and the distance to the coast as well as the closest navigable river. 
         Nature controls consist of median altitude and slope, mean annual temperature, precipitation
         and evapotranspiration, the ratio of the two, agricultural suitability, and soils' suitability for cash crop production. 
         Ethnic controls are the reliance on agriculture and pastoralism, as well as the intensity of agricultural activities. 
         Additionally, all covariates are interacted with `French rule'.
         Significance codes: $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.}")}


# Specifications combines
spec.ls <- list(list(add = c(), 
                     fe = paste0(fe.main),
                     cutoff = "All"),
                list(add = c("factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2))

##############################
# Balance Tests
# Appendix Table A18
##############################

# Estimate
model.ls <- lapply(spec.ls, function(s){
    lapply(balance.vars, function(v){
      # print(v)
      if(is.numeric(s$cutoff)){
        df <- balance.data[balance.data$fbperp.dist < s$cutoff,]
      } else {
        df <- balance.data
      }
      felm(make_form(dv = v, expl = c(treat, s$add), 
                     fe = s$fe, iv = "0", se = se.cluster),
           data = df, keepX = F, keepCX = F, keepModel = F)
    })
})

# Add rows
add.rows <- list(latex.any.row("RD-Design", c("no", "yes","yes")),
                 latex.any.row("Cutoff (dec. degrees)", c("--", "5","2.5")),
                 latex.any.row("Obs", unlist(lapply(model.ls, function(m){m[[1]]$N}))),
                 latex.any.row("British", unlist(lapply(model.ls, function(m){sum(m[[1]]$X[,"colbrit"])}))),
                 latex.any.row("French", unlist(lapply(model.ls, function(m){sum(m[[1]]$X[,"colbrit"] == 0)}))))

# Print
fileConn<-file(file.path(tab.path, paste0("govscale.rdd.balance", ".tex")))
writeLines(
  balance_table_tex(model.ls = model.ls, treat.var = "v33.num_colfrnc", treat.var.lab = "Centr. $\\times$ French",
                    balance.var.label = balance.var.labs,
                    digits = 3, stars = T, std.coef = T,
                    label = "govscale.rdd.balance", col.sep.width = "5pt", title = "Balance test: Grid-cell level", 
                    col.header = c("All", "RDD", "RDD"), 
                    notes = balance.notes, add.rows = add.rows), 
  fileConn)
close(fileConn)

###############################
# Main RDD Estimation
# Appendix Table A19
###############################
stub <- "govscale.rdd.main"

# ... specifications
spec.ls <- list(list(add = c(treat), 
                     fe = paste0(fe.main),
                     cutoff = "All"),
                list(add = c(treat, paste0("colbrit*",balance.vars)), 
                     fe = paste0(fe.main),
                     cutoff = "All"),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num", paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2.5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num",
                             paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket "),
                     cutoff = 2.5))


# ... estimate
model.ls <- lapply(spec.ls, function(s){
  if(is.numeric(s$cutoff)){
    df <- balance.data[balance.data$fbperp.dist < s$cutoff & !is.na(balance.data$fbperp.dist),]
  } else {
    df <- balance.data
  }
  felm(make_form(dv = "log(area)", expl = c(s$add), 
                 fe = s$fe, iv = "0", se = se.cluster),
       data = df)
})


# ... Add rows
mean.dv <- latex.any.row("Mean DV", round_any(unlist(lapply(model.ls, function(m){mean(m$response)})), 0.01) )
add.lines <- list(latex.any.row("Colony FE: ",rep("yes",length(model.ls))),
                  latex.any.row("Border-region FE: ",c("no","no",rep("yes",length(model.ls)-2))),
                  latex.any.row("Dist2border $\\times$ French: ",c("no","no",rep("yes",length(model.ls)-2))),
                  latex.any.row("Dist2border $\\times$ French ",c(rep("",length(model.ls)-2))),
                  latex.any.row("$\\times$ Precol. centr.: ",c("no","no",rep("yes",length(model.ls)-2))),
                  latex.any.row("Dist. cutoff (dec. degr.): ",c("--","--",5,5,2.5,2.5)),
                  latex.any.row("Baseline controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  latex.any.row("Nature controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  latex.any.row("Ethnic controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  mean.dv)

# ... save
fileConn<-file(file.path(tab.path, paste0(stub, ".tex")))
writeLines(
  stargazer(model.ls,
            title="Precolonial centralization and the size of districts: Grid-cells, RDD at French-British borders",
            dep.var.caption = "",dep.var.labels.include = FALSE,
            column.labels = collab_w_ruler(column.labels = c("All cells", "Regression Discontinuity Design"), 
                                           column.separate = c(2,4), trim = 14),
            column.separate = c(2,4),
            keep= 1:2, covariate.labels = c("Precol. centralization", "Precol. centr. $\\times$ French"),
            notes.align = "l",label=paste0("tab.",stub),align =T,
            add.lines = add.lines, 
            digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width ="-12pt",
            notes = latex.notes.add(.95), notes.label = "", notes.append = F,
            type  = "latex"), 
  fileConn)
close(fileConn)


#######################
# RDD Plots: Discrete and continous distance values
# Figures 9 (main paper) and A12 (Appendix) 
#######################

# ... RDD data
plot.rdd.df <- balance.data[balance.data$fbperp.dist < 5 & !is.na(balance.data$fbperp.dist),]

# ... make distance to border bind
plot.rdd.df$dist.bins <- cut(plot.rdd.df$fbperp.dist, seq(0,100, by = .5))
plot.rdd.df$pts.id <- c(1:nrow(plot.rdd.df))

# ... estimate model
m <- felm(log(area) ~ factor(colbrit):dist.bins:v33.num + factor(colbrit):fbperp.dist + I((fbperp.id == 437.452)*v33.num)  | 
            cowcode + factor(fbperp.id):dist.bucket | 0 | province.id, ### Carefull: distbucket or bin??
          data = plot.rdd.df)
# summary(m)

# ... extract coefs
plot.df <- data.frame(name = rownames(m$coefficients), coef = m$coefficients[,1], se = diag(m$clustervcv)^.5,
                      stringsAsFactors = F)
plot.df <- plot.df[grepl("v33", plot.df$name),]
plot.df <- plot.df[!grepl("fbperp.id", plot.df$name),]
plot.df$british <- ifelse(grepl("factor(colbrit)", plot.df$name, fixed = T), 
                          ifelse(grepl("factor(colbrit)0", plot.df$name, fixed = T), 0, 1), NA)
plot.df$distance <- rep(na.omit(unique(plot.rdd.df$dist.bins))[order(na.omit(unique(plot.rdd.df$dist.bins)))],
                        each = 2)
plot.df$distance.num <- as.numeric(plot.df$distance )
plot.df$distance.num <- seq(0.25, 10.25, by = .5)[plot.df$distance.num]
plot.df$run.var <- ifelse(plot.df$british == 1, plot.df$distance.num, plot.df$distance.num * -1)

# Save discrete version
png(file.path(fig.path, "govscale.rdd.discrete.png"), width = 5, height = 3, res = 400, unit = "in")
g <- ggplot(plot.df, aes(x = run.var, y = coef, group = british)) + 
  geom_point(aes(col = factor(british))) + geom_line(aes(col = factor(british))) + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se, col = factor(british)), 
                width=.01) +
  geom_vline(xintercept = 0, lty = 3) +
  xlab("Distance to border \n (bins of .5 dec. degrees)") +
  ylab("Marginal effect of \n precol. centralization") +
  annotate("text", x = -2.5, y = -.45, label = "French", col = "blue") +
  annotate("text", x = 2.5, y = -.45, label = "British", col = "red") + 
  scale_color_manual(breaks=c("british","french"),
                     values = c(`1` = "red", `0`  = "blue")) +
  scale_x_continuous(breaks = seq(-5, 5, length.out = 5), labels = abs(seq(-5, 5, length.out = 5))) +
  NULL
plot(g)
dev.off()

# ... continuous model

# ... --- estimate
m <- felm(log(area) ~ factor(colbrit):v33.num + factor(colbrit):fbperp.dist:v33.num + 
            factor(colbrit):fbperp.dist + I(factor(fbperp.id)==437.452):v33.num  | 
            cowcode + factor(fbperp.id):dist.bucket | 0 | province.id,
          data = plot.rdd.df)
# summary(m)

# ... --- get continuous coef of v33.num * brit / french
cont.df <- NULL
set.seed(250589)
sim <- mvrnorm(n=10000,mu=m$beta,Sigma=m$clustervcv)
for(b in c(0,1)){
  for(d in seq(0,5, by = .1)){
    sim.val <- sim[,paste0("factor(colbrit)",b,":v33.num")] +
      sim[,paste0("factor(colbrit)",b,":v33.num:fbperp.dist")]*d
    
    cont.df <- rbind(cont.df, c(fbperp.dist = d, coef = mean(sim.val),
                                british = b,
                                lb = quantile(sim.val,c(0.025)),
                                ub = quantile(sim.val,c(0.975))))
  }
}
cont.df <- data.frame(cont.df)
colnames(cont.df) <- c("fbperp.dist", "coef", "british", "lb", "ub")
cont.df$run.var <- ifelse(cont.df$british == 1, cont.df$fbperp.dist, cont.df$fbperp.dist * -1)

# ... coef annotate
se <- (m$clustervcv["factor(colbrit)1:v33.num","factor(colbrit)1:v33.num"] +
         m$clustervcv["factor(colbrit)0:v33.num","factor(colbrit)0:v33.num"] -
         2*m$clustervcv["factor(colbrit)0:v33.num","factor(colbrit)1:v33.num"])^.5
diff <- m$coefficients["factor(colbrit)1:v33.num",1] - m$coefficients["factor(colbrit)0:v33.num",1]
coef.note <- paste0("Difference = ",
                    format(diff, digits = 3),
                    "** (",format(se, digits = 3),")")

# Save continuous plot
png(file.path(fig.path, "govscale.rdd.continuous.png"), width = 5, height = 3, res = 400, unit = "in")
g <- ggplot(cont.df, aes(x = run.var, y = coef, group = factor(british))) + 
  geom_vline(xintercept = 0, lty = 3) +
  geom_line(aes(col = factor(british))) +
  geom_line(aes(y = lb, col = factor(british)), lty = 2) +
  geom_line(aes(y = ub, col = factor(british)), lty = 2) +
  xlab("Distance to border \n (continuous)") +
  ylab("Marginal effect of \n precol. centralization") +
  geom_line(data = data.frame(x = c(0,0), y = c(.799, .6), british = 0), aes(x = x, y = y),
            arrow = arrow(length=unit(0.20,"cm"),
                          ends="last", type = "closed"),
            col = "black") +
  geom_line(data = data.frame(x = c(0,1), y = c(.8, .8), british = 0), aes(x = x, y = y),
            col = "black") +
  scale_color_manual(breaks=c("british","french"),
                     values = c(`1` = "red", `0`  = "blue")) +
  annotate("text", x = 1.2, y = .8, label = coef.note, hjust = 0, cex = 2.5) +
  annotate("text", x = -2.5, y = -.45, label = "French", col = "blue") +
  annotate("text", x = 2.5, y = -.45, label = "British", col = "red") + 
  scale_x_continuous(breaks = seq(-5, 5, length.out = 5), labels = abs(seq(-5, 5, length.out = 5))) +
  NULL
plot(g)
dev.off()


# Combined plot
png(file.path(fig.path, "govscale.rdd.discrcont.png"), width = 6, height = 3, res = 400, unit = "in")
g <- ggplot(plot.df, aes(x = run.var, y = coef, group = british)) + 
  geom_point(aes(col = factor(british)), alpha = .25) + geom_line(aes(col = factor(british)), alpha = .25) + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se, col = factor(british)), 
                width=.01, alpha = .25) +
  geom_vline(xintercept = 0, lty = 3) +
  geom_line(data = cont.df, aes(col = factor(british))) +
  geom_line(data = cont.df, aes(y = lb, col = factor(british)), lty = 2) +
  geom_line(data = cont.df, aes(y = ub, col = factor(british)), lty = 2) +
  xlab("Distance to border") +
  ylab("Marginal effect of \n precol. centralization") +
  annotate("text", x = -2.5, y = -.45, label = "French", col = "blue") +
  annotate("text", x = 2.5, y = -.45, label = "British", col = "red") + 
  geom_line(data = data.frame(x = c(0,0), y = c(.799, .6), british = 0), aes(x = x, y = y),
            arrow = arrow(length=unit(0.20,"cm"),
                          ends="last", type = "closed"),
            col = "black") +
  geom_line(data = data.frame(x = c(0,1), y = c(.8, .8), british = 0), aes(x = x, y = y),
            col = "black") +
  scale_color_manual(breaks=c("british","french"),
                     values = c(`1` = "red", `0`  = "blue")) +
  annotate("text", x = 1.2, y = .8, label = coef.note, hjust = 0, cex = 3) +
  scale_x_continuous(breaks = seq(-5, 5, length.out = 5), labels = abs(seq(-5, 5, length.out = 5))) +
  NULL
plot(g)
dev.off()


##########################
# Robustness: over all windowsizes
# Appendix Figure A13
##########################

# ... estimate
spec.ls <- list(list(add = c("factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2),
                list(add = c("colbrit:fbperp.dist", paste0("colbrit*",balance.vars)),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2))

plot.df <- do.call(rbind, lapply(c(1:length(spec.ls)), function(s){
  do.call(rbind, lapply(seq(5, .5, by = -.1), function(c){
    m <- felm(make_form(dv = "log(area)", expl = c(treat, spec.ls[[s]]$add), 
                   fe = spec.ls[[s]]$fe, iv = "0", se = se.cluster),
         data = balance.data[balance.data$fbperp.dist < c,])
    c(coef = m$coefficients["v33.num_colfrnc", 1],
      se = m$clustervcv["v33.num_colfrnc", "v33.num_colfrnc"]^.5,
      spec = s,
      cutoff = c)
  }))
}))
plot.df <- data.frame((plot.df))
plot.df$spec <- c("(1) Without covariates", "(2) With covariates")[plot.df$spec]

# ... plot
png(file.path(fig.path, "govscale.rdd.cutoff.png"), width = 7, height = 3.5, res = 400, unit = "in")
g <- ggplot(plot.df, aes(x = cutoff, y = coef)) + geom_point() + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                     ymax = coef + 1.96*se), 
                 width=.01) +
  facet_wrap(~ spec) + geom_hline(yintercept = 0) +
  xlab("Distance to border cutoff") +
  ylab("French-British difference in \n marginal effect of \n precol. centralization")
plot(g)
dev.off()


#######################
# Robustness: Donut Estimation
# Appendix Table A20
#######################

# ... specifications
spec.ls <- list(list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num", paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2.5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num",
                             paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket "),
                     cutoff = 2.5))


# ... estimate
model.ls <- lapply(spec.ls, function(s){
  if(is.numeric(s$cutoff)){
    df <- balance.data[balance.data$fbperp.dist > 0.1 & 
                         balance.data$fbperp.dist < s$cutoff & 
                         !is.na(balance.data$fbperp.dist),]
  } else {
    df <- balance.data
  }
  felm(make_form(dv = "log(area)", expl = c(s$add), 
                 fe = s$fe, iv = "0", se = se.cluster),
       data = df)
})


# ... Add rows
mean.dv <- latex.any.row("Mean DV", round_any(unlist(lapply(model.ls, function(m){mean(m$response)})), 0.01) )
add.lines <- list(latex.any.row("Colony FE: ",rep("yes",length(model.ls))),
                  latex.any.row("Border-region FE: ",c(rep("yes",length(model.ls)))),
                  latex.any.row("Dist2border $\\times$ French: ",c(rep("yes",length(model.ls)))),
                  latex.any.row("Dist2border $\\times$ French ",c(rep("",length(model.ls)))),
                  latex.any.row("$\\times$ Precol. centr.: ",c(rep("yes",length(model.ls)))),
                  latex.any.row("Min. dist. (dec. degr.): ",rep(.1, 4)),
                  latex.any.row("Dist. cutoff (dec. degr.): ",c(5,5,2.5,2.5)),
                  latex.any.row("Baseline controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  latex.any.row("Nature controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  latex.any.row("Ethnic controls: ",rep(c("no","yes"),length(model.ls)/2)),
                  mean.dv)
stub <- "govscale.rdd.donut"
fileConn<-file(file.path(tab.path, paste0(stub, ".tex")))
writeLines(
  stargazer(model.ls,
            title="Precolonial centralization and the size of districts: RDD at French-British borders, donut specification",
            dep.var.caption = "",dep.var.labels.include = FALSE,
            column.labels = collab_w_ruler(column.labels = c("Regression Discontinuity Design"), 
                                           column.separate = c(4), trim = 0),
            column.separate = c(4),
            keep= 1, covariate.labels = c("Precol. centr. $\\times$ French"),
            notes.align = "l",label=paste0("tab.",stub),align =T,
            add.lines = add.lines, 
            digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width ="3pt",
            notes = latex.notes.add(.95), notes.label = "", notes.append = F,
            type  = "latex"), 
  fileConn)
close(fileConn)

#############################
# Robustness: Across grid cell sizes
# Appendix Figure A14
#############################

# ... specifications
spec.ls <- list(list(add = c(treat), 
                     fe = paste0(fe.main),
                     cutoff = "All"),
                list(add = c(treat, paste0("colbrit*",balance.vars)), 
                     fe = paste0(fe.main),
                     cutoff = "All"),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num", paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num"), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2.5),
                list(add = c("v33.num_colfrnc", "factor(colbrit):fbperp.dist", "factor(colbrit):fbperp.dist:v33.num",
                             "factor(fbperp.id):v33.num", paste0("colbrit*",balance.vars)), #, "I(v33.num*fbperp.dist)*colbrit"),
                     fe = paste0(fe.main, "+ factor(fbperp.id):dist.bucket"),
                     cutoff = 2.5))

# ... estimate
model.ls <- unlist(lapply(1:4, function(af){
  print(af)
  # Load data
  data <- readRDS(paste0("data/pts_rdd_govscale_",af,".Rdata"))
  
  # Subset to relevant points
  data <- data[(data$colfrnc == 1 | data$colbrit == 1) & !is.na(data$colbrit) &
                 data$cowcode %in% data.dist$countries_cowid,]
  
  # Make vars
  data$dist.bucket <- cut(data$dist.coast, seq(0,10000, by = 110))
  data$v33.num_colfrnc <- data$v33.num * data$colfrnc
  
  # Estimate
  lapply(spec.ls, function(s){
    if(is.numeric(s$cutoff)){
      df <- data[data$fbperp.dist < s$cutoff & !is.na(data$fbperp.dist),]
    } else {
      df <- data
    }
    felm(make_form(dv = "log(area)", expl = c(s$add), 
                   fe = s$fe, iv = "0", se = se.cluster),
         data = df)
  })
}), recursive = F)


# Get coefficients
coef.df <- data.frame(do.call(rbind, lapply(model.ls, function(m){
  c(coef = m$coefficients["v33.num_colfrnc", 1],
    se = m$clustervcv["v33.num_colfrnc","v33.num_colfrnc"]^.5)
})), stringsAsFactors = F)
coef.df$point_dens <- rep(1:4, each = length(spec.ls))
coef.df$point_dens_num <- as.character(signif(.083 * coef.df$point_dens, 2))
coef.df$spec <- rep(c(1:6), 4)
coef.df$spec.lab <- c("All cells", "All cells\n+ controls",
                      "RDD\n(<5 dec. degr.)", "RDD\n(<5 dec. degr.)\n+ controls",
                      "RDD\n(<2.5 dec. degr.)", "RDD\n(<2.5 dec. degr.)\n+ controls")[coef.df$spec]
coef.df$pos <- coef.df$spec + seq(-.25, .25, length.out = 4)[coef.df$point_dens]

# Plot
png(file.path(fig.path, "govscale.rdd.sizecheck.png"), width = 7, height = 3.5, res = 400, unit = "in")
g <- ggplot(coef.df, aes(x = pos, y = coef, col = point_dens_num, pch = point_dens_num)) + 
  geom_hline(yintercept = 0, lty = 3) +
  geom_point() + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se)) +
  scale_x_continuous(breaks=unique(coef.df$spec),
                     labels = unique(coef.df$spec.lab)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5),
        legend.position="top")+
  guides(col=guide_legend(title="Grid cell size (dec. degrees)"),
         pch=guide_legend(title="Grid cell size (dec. degrees)")) +
  xlab("") + ylab("French-British difference in \n marginal effect of \n precol. centralization")
  NULL
print(g)
dev.off()

